# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy
import datetime
from scrapy.loader.processors import MapCompose, TakeFirst, Join
from scrapy.loader import ItemLoader
import re


class ArticlespiderItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass


def date_convert(value):
    """
    日期转换函数
    :param value:
    :return:
    """
    try:
        createDate = value.strip().replace('·', '')
    except Exception as e:
        createDate = datetime.datetime.now().date()
    return createDate


def get_nums(value):
    """
    通过正则表达式获取评论数，点赞数，收藏数
    :param value:
    :return:
    """
    match_re = re.match('.*(\d+).*', value)
    if match_re:
        nums = int(match_re.group(1))
    else:
        nums = 0
    return nums


def return_value(value):
    """
    直接返回传入值
    :param value:
    :return:
    """
    return value


def remove_commont_tags(value):
    """
    删除字符串中带评论字样的字符串
    :param value:
    :return:
    """
    if '评论' in value:
        return ''
    else:
        return value


class ArticleItemLoader(ItemLoader):
    default_output_processor = TakeFirst()


class JobboleArticleItem(scrapy.Item):
    """
    文章item的原型定义类，对该类中的每一个属性使用处理函数处理，减少在数据捕获过程中的处理
    """
    title = scrapy.Field()
    createDate = scrapy.Field(
        input_processor=MapCompose(date_convert)
    )
    url = scrapy.Field()
    url_object_id = scrapy.Field()
    '''
    ArticleSpiderImagePipeline在处理图像地址的时候需要最终组成一个列表
    所以在处理front_image_url，最终返回的还是需要是一个列表不适用继承中默认的TakeFirst方法获取
    自定义函数直接输入什么就返回什么
    '''
    front_image_url = scrapy.Field(
        output_processor=MapCompose(return_value)
    )
    front_image_path = scrapy.Field()
    praise_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    commont_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    fav_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    tags = scrapy.Field(
        input_processor=MapCompose(remove_commont_tags),
        output_processor=Join(',')
    )
    content = scrapy.Field()
